{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-07-01T09:23:29.351065Z",
     "start_time": "2024-07-01T09:23:22.876106Z"
    }
   },
   "source": [
    "import os \n",
    "import scipy.io as sio\n",
    "import scipy\n",
    "import pickle\n",
    "import pandas as pd\n",
    "\n",
    "mat_dir_path = '/mnt/Data/engs2588/data/PhysioNet2017/raw/'  # Replace with your actual directory path\n",
    "reference_file_path = '/mnt/Data/engs2588/data/PhysioNet2017/REFERENCE.csv'\n",
    "\n",
    "# Read the reference CSV file\n",
    "reference_df = pd.read_csv(reference_file_path, header=None)\n",
    "reference_df.columns = ['Recording', 'Label']\n",
    "\n",
    "# Initialize a dictionary to store the data\n",
    "data_dict = {}\n",
    "\n",
    "# List all .mat files in the directory that match the pattern A*****.mat and sort them\n",
    "mat_files = sorted([f for f in os.listdir(mat_dir_path) if f.startswith('A') and f.endswith('.mat')])\n",
    "\n",
    "# Read each .mat file and store the data in the dictionary along with the label\n",
    "for mat_file in mat_files:\n",
    "    file_path = os.path.join(mat_dir_path, mat_file)\n",
    "    mat_data = scipy.io.loadmat(file_path)\n",
    "    key = mat_file.split('.')[0]  # Remove the .mat extension to match with reference keys\n",
    "    label = reference_df.loc[reference_df['Recording'] == key, 'Label'].values[0]\n",
    "    data_dict[key] = {'data': mat_data['val'].flatten(), 'label': label}  # Flatten the data if needed\n",
    "\n",
    "\n"
   ],
   "outputs": [],
   "execution_count": 9
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "#",
   "id": "95a27f2e6272fc76"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-07-01T09:23:31.780782Z",
     "start_time": "2024-07-01T09:23:31.509516Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# Define the output .pkl file path\n",
    "output_file_path = '../data/ECG.pkl'\n",
    "# Save the data dictionary to a .pkl file\n",
    "with open(output_file_path, 'wb') as pkl_file:\n",
    "    pickle.dump(data_dict, pkl_file)\n"
   ],
   "id": "fa1fa230ea3a1275",
   "outputs": [],
   "execution_count": 10
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "",
   "id": "75cae5e4aaed2054"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
